home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 39
/
Amiga Format CD39 (1999-04-13)(Future Publishing)(GB)[!][issue 1999-05].iso
/
-seriously_amiga-
/
graphics
/
ripley
/
source
/
idctppc.asm
< prev
next >
Wrap
Assembly Source File
|
1999-03-02
|
9KB
|
537 lines
#********************************************************************
#
# idctppc.asm
#
# 1.0 - 20.9.98 (copper) written in assembler
#
#********************************************************************
.globl Initialize_Fast_IDCT
.globl Fast_IDCT
.set W1,2841
.set W2,2676
.set W3,2408
.set W5,1609
.set W6,1108
.set W7,565
.bss iclip,1024,4
.bss iclp,1024,4
.set blk,r12
.set x0,r14
.set x1,r15
.set x2,r16
.set x3,r17
.set x4,r18
.set x5,r19
.set x6,r20
.set x7,r21
.set x8,r11
#********************************************************************
.text
Initialize_Fast_IDCT:
addi r1,r1,-36
stswi r13,r1,0 # save registers
stw r21,32(r1)
lis r3,iclip@ha
addi r3,r3,iclip@l
li r4,-256
li r5,256
mtctr r5
subi r3,r3,4
iniloop:
sthu r4,2(r3)
bdnz iniloop
li r4,-255
li r5,512
mtctr r5
subi r3,r3,4
iniloop1:
sthu r4,2(r3)
addi r4,r4,1
bdnz iniloop1
li r4,-255
li r5,256
mtctr r5
subi r3,r3,4
iniloop2:
sthu r4,2(r3)
bdnz iniloop2
lswi r13,r1,0 # restore registers
lwz r21,32(r1)
addi r1,r1,36
blr #rts
#********************************************************************
.macro idctrow
# /* shortcut */
# if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) |
# (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3])))
# {
# blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=blk[0]<<3;
# return;
# }
lhz x1,4*2(blk)
extsh x1,x1
rlwinm x1,x1,11,0,20
mr r31,x1
lhz x2,6*2(blk)
extsh x2,x2
or r31,r31,x2
lhz x3,2*2(blk)
extsh x3,x3
or r31,r31,x3
lhz x4,1*2(blk)
extsh x4,x4
or r31,r31,x4
lhz x5,7*2(blk)
extsh x5,x5
or r31,r31,x5
lhz x6,5*2(blk)
extsh x6,x6
or r31,r31,x6
lhz x7,3*2(blk)
extsh x7,x7
or r31,r31,x7
cmpwi r31,0
bne nozero
# blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=blk[0]<<3;
# return;
lha r31,0*2(blk)
extsh r31,r31
rlwinm r31,r31,3,0,28
sth r31,0*2(blk)
sth r31,1*2(blk)
sth r31,2*2(blk)
sth r31,3*2(blk)
sth r31,4*2(blk)
sth r31,5*2(blk)
sth r31,6*2(blk)
sth r31,7*2(blk)
b ok
nozero:
# x0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */
lhz x0,0(blk)
extsh x0,x0
rlwinm x0,x0,11,0,20
addi x0,x0,128
# /* first stage */
# x8 = W7*(x4+x5);
# x4 = x8 + (W1-W7)*x4;
add x8,x4,x5
mulli x8,x8,W7
mulli x4,x4,(W1-W7)
add x4,x8,x4
# x5 = x8 - (W1+W7)*x5;
# x8 = W3*(x6+x7);
mulli x5,x5,(W1+W7)
sub x5,x8,x5
add x8,x6,x7
mulli x8,x8,W3
# x6 = x8 - (W3-W5)*x6;
# x7 = x8 - (W3+W5)*x7;
mulli x6,x6,(W3-W5)
sub x6,x8,x6
mulli x7,x7,(W3+W5)
sub x7,x8,x7
# /* second stage */
# x8 = x0 + x1;
# x0 -= x1;
add x8,x0,x1
sub x0,x0,x1
# x1 = W6*(x3+x2);
# x2 = x1 - (W2+W6)*x2;
add x1,x3,x2
mulli x1,x1,W6
mulli x2,x2,(W2+W6)
sub x2,x1,x2
# x3 = x1 + (W2-W6)*x3;
# x1 = x4 + x6;
# x4 -= x6;
# x6 = x5 + x7;
# x5 -= x7;
mulli x3,x3,(W2-W6)
add x3,x1,x3
add x1,x4,x6
sub x4,x4,x6
add x6,x5,x7
sub x5,x5,x7
# /* third stage */
# x7 = x8 + x3;
# x8 -= x3;
# x3 = x0 + x2;
# x0 -= x2;
add x7,x8,x3
sub x8,x8,x3
add x3,x0,x2
sub x0,x0,x2
# x2 = (181*(x4+x5)+128)>>8;
# x4 = (181*(x4-x5)+128)>>8;
add x2,x4,x5
mulli x2,x2,181
addi x2,x2,128
srawi x2,x2,8
sub x4,x4,x5
mulli x4,x4,181
addi x4,x4,128
srawi x4,x4,8
# /* fourth stage */
# blk[0] = (x7+x1)>>8;
# blk[1] = (x3+x2)>>8;
# blk[2] = (x0+x4)>>8;
# blk[3] = (x8+x6)>>8;
# blk[4] = (x8-x6)>>8;
# blk[5] = (x0-x4)>>8;
# blk[6] = (x3-x2)>>8;
# blk[7] = (x7-x1)>>8;
add r31,x7,x1
srawi r31,r31,8
extsh r31,r31
sth r31,0(blk)
add r31,x3,x2
srawi r31,r31,8
extsh r31,r31
sth r31,1*2(blk)
add r31,x0,x4
srawi r31,r31,8
extsh r31,r31
sth r31,2*2(blk)
add r31,x8,x6
srawi r31,r31,8
extsh r31,r31
sth r31,3*2(blk)
sub r31,x8,x6
srawi r31,r31,8
extsh r31,r31
sth r31,4*2(blk)
sub r31,x0,x4
srawi r31,r31,8
extsh r31,r31
sth r31,5*2(blk)
sub r31,x3,x2
srawi r31,r31,8
extsh r31,r31
sth r31,6*2(blk)
sub r31,x7,x1
srawi r31,r31,8
extsh r31,r31
sth r31,7*2(blk)
ok:
.endm
#-----------------------------------------------------------------
.macro idctcol
# /* shortcut */
# if (!((x1 = (blk[8*4]<<8)) | (x2 = blk[8*6]) | (x3 = blk[8*2]) |
# (x4 = blk[8*1]) | (x5 = blk[8*7]) | (x6 = blk[8*5]) | (x7 = blk[8*3])))
lhz x1,8*4*2(blk)
extsh x1,x1
rlwinm x1,x1,8,0,23
mr r31,x1
lhz x2,8*6*2(blk)
extsh x2,x2
or r31,r31,x2
lhz x3,8*2*2(blk)
extsh x3,x3
or r31,r31,x3
lhz x4,8*1*2(blk)
extsh x4,x4
or r31,r31,x4
lhz x5,8*7*2(blk)
extsh x5,x5
or r31,r31,x5
lhz x6,8*5*2(blk)
extsh x6,x6
or r31,r31,x6
lhz x7,8*3*2(blk)
extsh x7,x7
or r31,r31,x7
cmpwi r31,0
bne colnozero
# blk[8*0]=blk[8*1]=blk[8*2]=blk[8*3]=blk[8*4]=blk[8*5]=blk[8*6]=blk[8*7]=
# iclp[(blk[8*0]+32)>>6];
lis r30,iclp@ha
addi r30,r30,iclp@l
lhz r31,8*0*2(blk)
extsh r31,r31
addi r31,r31,32
srawi r31,r31,6
rlwinm r31,r31,1,0,30 # * 2
lhzx r31,r30,r31
extsh r31,r31
sth r31,8*0*2(blk)
sth r31,8*1*2(blk)
sth r31,8*2*2(blk)
sth r31,8*3*2(blk)
sth r31,8*4*2(blk)
sth r31,8*5*2(blk)
sth r31,8*6*2(blk)
sth r31,8*7*2(blk)
b ok1
colnozero:
# x0 = (blk[8*0]<<8) + 8192;
lhz x0,8*0(blk)
extsh x0,x0
rlwinm x0,x0,8,0,23
addi x0,x0,8192
# /* first stage */
# x8 = W7*(x4+x5) + 4;
# x4 = (x8+(W1-W7)*x4)>>3;
# /* first stage */
# x8 = W7*(x4+x5);
# x4 = x8 + (W1-W7)*x4;
add x8,x4,x5
mulli x8,x8,W7
addi x8,x8,4
mulli x4,x4,(W1-W7)
add x4,x8,x4
srawi x4,x4,3
# x5 = (x8-(W1+W7)*x5)>>3;
# x8 = W3*(x6+x7) + 4;
mulli x5,x5,(W1+W7)
sub x5,x8,x5
srawi x5,x5,3
add x8,x6,x7
mulli x8,x8,W3
addi x8,x8,4
# x6 = (x8-(W3-W5)*x6)>>3;
# x7 = (x8-(W3+W5)*x7)>>3;
mulli x6,x6,(W3-W5)
sub x6,x8,x6
srawi x6,x6,3
mulli x7,x7,(W3+W5)
sub x7,x8,x7
srawi x7,x7,3
# /* second stage */
# x8 = x0 + x1;
# x0 -= x1;
add x8,x0,x1
sub x0,x0,x1
# x1 = W6*(x3+x2) + 4;
# x2 = (x1-(W2+W6)*x2)>>3;
add x1,x3,x2
mulli x1,x1,W6
addi x1,x1,4
mulli x2,x2,(W2+W6)
sub x2,x1,x2
srawi x2,x2,3
# x3 = (x1+(W2-W6)*x3)>>3;
# x1 = x4 + x6;
# x4 -= x6;
# x6 = x5 + x7;
# x5 -= x7;
mulli x3,x3,(W2-W6)
add x3,x1,x3
srawi x3,x3,3
add x1,x4,x6
sub x4,x4,x6
add x6,x5,x7
sub x5,x5,x7
# /* third stage */
# x7 = x8 + x3;
# x8 -= x3;
# x3 = x0 + x2;
# x0 -= x2;
add x7,x8,x3
sub x8,x8,x3
add x3,x0,x2
sub x0,x0,x2
# x2 = (181*(x4+x5)+128)>>8;
# x4 = (181*(x4-x5)+128)>>8;
add x2,x4,x5
mulli x2,x2,181
addi x2,x2,128
srawi x2,x2,8
extsh x2,x2
sub x4,x4,x5
mulli x4,x4,181
addi x4,x4,128
srawi x4,x4,8
extsh x4,x4
# /* fourth stage */
# blk[8*0] = iclp[(x7+x1)>>14];
# blk[8*1] = iclp[(x3+x2)>>14];
# blk[8*2] = iclp[(x0+x4)>>14];
# blk[8*3] = iclp[(x8+x6)>>14];
# blk[8*4] = iclp[(x8-x6)>>14];
# blk[8*5] = iclp[(x0-x4)>>14];
# blk[8*6] = iclp[(x3-x2)>>14];
# blk[8*7] = iclp[(x7-x1)>>14];
# clipping erstmal wegelassen
lis r30,iclp@ha
addi r30,r30,iclp@l
add r31,x7,x1
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,0(blk)
add r31,x3,x2
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*1*2(blk)
add r31,x0,x4
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*2*2(blk)
add r31,x8,x6
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*3*2(blk)
sub r31,x8,x6
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*4*2(blk)
sub r31,x0,x4
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*5*2(blk)
sub r31,x3,x2
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*6*2(blk)
sub r31,x7,x1
srawi r31,r31,14
rlwinm r31,r31,1,0,30
lhzx r29,r30,r31
extsh r29,r29
sth r29,8*7*2(blk)
ok1:
.endm
#-----------------------------------------------------------------------
Fast_IDCT:
# int i;
.set i,r5
.align 4
addi r1,r1,-36
stswi r13,r1,0 # save registers
stw r21,32(r1)
# for (i=0; i<8; i++)
# idctrow(block+8*i);
li i,0
rowloop:
rlwinm blk,i,4,0,27
add blk,blk,r3
idctrow
addi i,i,1
cmpwi i,8
bne rowloop
# for (i=0; i<8; i++)
# idctcol(block+i);
li i,0
colllop:
add blk,i,i
add blk,r3,blk
idctcol
addi i,i,1
cmpwi i,8
bne colllop
lswi r13,r1,0 # restore registers
lwz r21,32(r1)
addi r1,r1,36
blr #rts
.type Initialize_Fast_IDCT,@function
.size Initialize_Fast_IDCT,$-Initialize_Fast_IDCT
.type Fast_IDCT,@function
.size Fast_IDCT,$-Fast_IDCT